In [1]:
#Mecca.csv
In [303]:
import numpy as np 
import pandas as pd

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt 
import plotly.express as px
from pylab import rcParams
import warnings
import seaborn as sns 
rcParams["figure.figsize"]=(30,18)
warnings.filterwarnings("ignore")
import os
for dirname, _, filenames in os.walk('CSV/total-rain-fall-in-mm-observed-by-pme-met-station-2009/'):
    for filename in filenames:
        print(os.path.join(dirname, filename))
CSV/total-rain-fall-in-mm-observed-by-pme-met-station-2009/Mecca.csv
CSV/total-rain-fall-in-mm-observed-by-pme-met-station-2009/total-rain-fall-in-mm-observed-by-pme-met-station-2009.csv
In [310]:
# Importing the libraries
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
import tensorflow as tf
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM, GRU
from keras.layers import Dropout
from sklearn.preprocessing import MinMaxScaler
from keras.models import load_model
from keras.callbacks import EarlyStopping
from keras.callbacks import ModelCheckpoint
import itertools
import random
import os
WARNING:tensorflow:From C:\Users\Mshika21\anaconda3\Lib\site-packages\keras\src\losses.py:2976: The name tf.losses.sparse_softmax_cross_entropy is deprecated. Please use tf.compat.v1.losses.sparse_softmax_cross_entropy instead.

In [304]:
data =  pd.read_csv('CSV/total-rain-fall-in-mm-observed-by-pme-met-station-2009/Mecca.csv', header=0)
train_percent=0.60
num_epochs=150
In [305]:
data
Out[305]:
Date Value
0 1-Jan-01 10.2
1 1-Feb-01 3.1
2 1-Mar-01 6.1
3 1-Apr-01 7.1
4 1-May-01 3.4
... ... ...
1447 1-Aug-21 0.1
1448 1-Sep-21 0.3
1449 1-Oct-21 2.4
1450 1-Nov-21 14.2
1451 1-Dec-21 24.8

1452 rows × 2 columns

In [307]:
print(len(data['Value'].unique()))
symbols = data['Value'].unique()
248
In [308]:
data['Value'].describe()
Out[308]:
count    1452.000000
mean        6.188085
std         8.052472
min         0.000000
25%         0.200000
50%         3.000000
75%         9.600000
max        52.000000
Name: Value, dtype: float64
In [311]:
#random.seed(42)  #explain!
choosen_symbol = random.choice(symbols)
print(choosen_symbol)
35.5
In [312]:
data_mod = data.drop(['Date'], axis=1)
In [313]:
data_mod
Out[313]:
Value
0 10.2
1 3.1
2 6.1
3 7.1
4 3.4
... ...
1447 0.1
1448 0.3
1449 2.4
1450 14.2
1451 24.8

1452 rows × 1 columns

In [314]:
# Feature Scaling
sc = MinMaxScaler(feature_range=(0, 1))
data_mod_scaled = sc.fit_transform(data_mod)
In [315]:
# Creating a data structure (it does not work when you have only one feature)
def create_data(df, n_future, n_past, train_test_split_percentage, validation_split_percentage):
    n_feature = df.shape[1]
    x_data, y_data = [], []
    
    for i in range(n_past, len(df) - n_future + 1):
        x_data.append(df[i - n_past:i, 0:n_feature])
        y_data.append(df[i + n_future - 1:i + n_future, 0])
    
    split_training_test_starting_point = int(round(train_test_split_percentage*len(x_data)))
    split_train_validation_starting_point = int(round(split_training_test_starting_point*(1-validation_split_percentage)))
    
    x_train = x_data[:split_train_validation_starting_point]
    y_train = y_data[:split_train_validation_starting_point]
    
    # if you want to choose the validation set by yourself, uncomment the below code.
    x_val = x_data[split_train_validation_starting_point:split_training_test_starting_point]
    y_val =  x_data[split_train_validation_starting_point:split_training_test_starting_point]                                             
    
    x_test = x_data[split_training_test_starting_point:]
    y_test = y_data[split_training_test_starting_point:]
    
    return np.array(x_train), np.array(x_test), np.array(x_val), np.array(y_train), np.array(y_test), np.array(y_val)
In [316]:
# Number of days you want to predict into the future
# Number of past days you want to use to predict the future

X_train, X_test, X_val, y_train, y_test, y_val = create_data(data_mod_scaled, n_future=1, n_past=25, train_test_split_percentage=train_percent,
                                               validation_split_percentage = 0)
In [317]:
print(X_train.shape)
print(X_test.shape)

print(y_train.shape)
print(y_test.shape)
(856, 25, 1)
(571, 25, 1)
(856, 1)
(571, 1)
In [318]:
# ------------------LSTM-----------------------
regressor = Sequential()
regressor.add(LSTM(units=16, return_sequences=True, input_shape=(X_train.shape[1], X_train.shape[2])))
regressor.add(Dropout(0.2))
regressor.add(LSTM(units=16, return_sequences=False))
regressor.add(Dropout(0.2))
regressor.add(Dense(units=1, activation='linear'))
regressor.compile(optimizer='adam', loss='mse', metrics=[tf.keras.metrics.RootMeanSquaredError()])
regressor.summary()
WARNING:tensorflow:From C:\Users\Mshika21\anaconda3\Lib\site-packages\keras\src\backend.py:873: The name tf.get_default_graph is deprecated. Please use tf.compat.v1.get_default_graph instead.

WARNING:tensorflow:From C:\Users\Mshika21\anaconda3\Lib\site-packages\keras\src\optimizers\__init__.py:309: The name tf.train.Optimizer is deprecated. Please use tf.compat.v1.train.Optimizer instead.

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
=================================================================
 lstm (LSTM)                 (None, 25, 16)            1152      
                                                                 
 dropout (Dropout)           (None, 25, 16)            0         
                                                                 
 lstm_1 (LSTM)               (None, 16)                2112      
                                                                 
 dropout_1 (Dropout)         (None, 16)                0         
                                                                 
 dense (Dense)               (None, 1)                 17        
                                                                 
=================================================================
Total params: 3281 (12.82 KB)
Trainable params: 3281 (12.82 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________
In [319]:
#es = EarlyStopping(monitor='val_loss', mode='min', verbose=1, patience=5)
#mc = ModelCheckpoint('best_model.h5', monitor='val_accuracy', mode='max', verbose=1, save_best_only=True)
# fit model
history = regressor.fit(X_train, y_train, validation_split=0.3, epochs=num_epochs, batch_size=64)
#history = regressor.fit(X_train, y_train, validation_split=0.3, epochs=num_epochs, batch_size=64, callbacks=[es])
Epoch 1/150
WARNING:tensorflow:From C:\Users\Mshika21\anaconda3\Lib\site-packages\keras\src\utils\tf_utils.py:492: The name tf.ragged.RaggedTensorValue is deprecated. Please use tf.compat.v1.ragged.RaggedTensorValue instead.

10/10 [==============================] - 7s 162ms/step - loss: 0.0324 - root_mean_squared_error: 0.1801 - val_loss: 0.0233 - val_root_mean_squared_error: 0.1525
Epoch 2/150
10/10 [==============================] - 0s 21ms/step - loss: 0.0254 - root_mean_squared_error: 0.1593 - val_loss: 0.0241 - val_root_mean_squared_error: 0.1553
Epoch 3/150
10/10 [==============================] - 0s 22ms/step - loss: 0.0249 - root_mean_squared_error: 0.1577 - val_loss: 0.0228 - val_root_mean_squared_error: 0.1510
Epoch 4/150
10/10 [==============================] - 0s 20ms/step - loss: 0.0241 - root_mean_squared_error: 0.1551 - val_loss: 0.0224 - val_root_mean_squared_error: 0.1497
Epoch 5/150
10/10 [==============================] - 0s 20ms/step - loss: 0.0240 - root_mean_squared_error: 0.1549 - val_loss: 0.0222 - val_root_mean_squared_error: 0.1491
Epoch 6/150
10/10 [==============================] - 0s 21ms/step - loss: 0.0239 - root_mean_squared_error: 0.1545 - val_loss: 0.0222 - val_root_mean_squared_error: 0.1492
Epoch 7/150
10/10 [==============================] - 0s 20ms/step - loss: 0.0230 - root_mean_squared_error: 0.1516 - val_loss: 0.0222 - val_root_mean_squared_error: 0.1491
Epoch 8/150
10/10 [==============================] - 0s 21ms/step - loss: 0.0227 - root_mean_squared_error: 0.1508 - val_loss: 0.0217 - val_root_mean_squared_error: 0.1473
Epoch 9/150
10/10 [==============================] - 0s 20ms/step - loss: 0.0225 - root_mean_squared_error: 0.1499 - val_loss: 0.0213 - val_root_mean_squared_error: 0.1459
Epoch 10/150
10/10 [==============================] - 0s 22ms/step - loss: 0.0219 - root_mean_squared_error: 0.1481 - val_loss: 0.0213 - val_root_mean_squared_error: 0.1460
Epoch 11/150
10/10 [==============================] - 0s 20ms/step - loss: 0.0213 - root_mean_squared_error: 0.1458 - val_loss: 0.0203 - val_root_mean_squared_error: 0.1426
Epoch 12/150
10/10 [==============================] - 0s 21ms/step - loss: 0.0206 - root_mean_squared_error: 0.1435 - val_loss: 0.0203 - val_root_mean_squared_error: 0.1426
Epoch 13/150
10/10 [==============================] - 0s 21ms/step - loss: 0.0202 - root_mean_squared_error: 0.1421 - val_loss: 0.0188 - val_root_mean_squared_error: 0.1369
Epoch 14/150
10/10 [==============================] - 0s 20ms/step - loss: 0.0195 - root_mean_squared_error: 0.1395 - val_loss: 0.0187 - val_root_mean_squared_error: 0.1368
Epoch 15/150
10/10 [==============================] - 0s 21ms/step - loss: 0.0188 - root_mean_squared_error: 0.1373 - val_loss: 0.0173 - val_root_mean_squared_error: 0.1317
Epoch 16/150
10/10 [==============================] - 0s 21ms/step - loss: 0.0181 - root_mean_squared_error: 0.1346 - val_loss: 0.0205 - val_root_mean_squared_error: 0.1431
Epoch 17/150
10/10 [==============================] - 0s 20ms/step - loss: 0.0187 - root_mean_squared_error: 0.1369 - val_loss: 0.0169 - val_root_mean_squared_error: 0.1300
Epoch 18/150
10/10 [==============================] - 0s 19ms/step - loss: 0.0184 - root_mean_squared_error: 0.1355 - val_loss: 0.0160 - val_root_mean_squared_error: 0.1263
Epoch 19/150
10/10 [==============================] - 0s 20ms/step - loss: 0.0171 - root_mean_squared_error: 0.1308 - val_loss: 0.0177 - val_root_mean_squared_error: 0.1331
Epoch 20/150
10/10 [==============================] - 0s 19ms/step - loss: 0.0173 - root_mean_squared_error: 0.1315 - val_loss: 0.0156 - val_root_mean_squared_error: 0.1250
Epoch 21/150
10/10 [==============================] - 0s 20ms/step - loss: 0.0161 - root_mean_squared_error: 0.1268 - val_loss: 0.0171 - val_root_mean_squared_error: 0.1309
Epoch 22/150
10/10 [==============================] - 0s 21ms/step - loss: 0.0159 - root_mean_squared_error: 0.1260 - val_loss: 0.0145 - val_root_mean_squared_error: 0.1205
Epoch 23/150
10/10 [==============================] - 0s 20ms/step - loss: 0.0150 - root_mean_squared_error: 0.1224 - val_loss: 0.0150 - val_root_mean_squared_error: 0.1223
Epoch 24/150
10/10 [==============================] - 0s 20ms/step - loss: 0.0148 - root_mean_squared_error: 0.1215 - val_loss: 0.0144 - val_root_mean_squared_error: 0.1199
Epoch 25/150
10/10 [==============================] - 0s 21ms/step - loss: 0.0146 - root_mean_squared_error: 0.1206 - val_loss: 0.0131 - val_root_mean_squared_error: 0.1146
Epoch 26/150
10/10 [==============================] - 0s 20ms/step - loss: 0.0141 - root_mean_squared_error: 0.1185 - val_loss: 0.0136 - val_root_mean_squared_error: 0.1168
Epoch 27/150
10/10 [==============================] - 0s 20ms/step - loss: 0.0134 - root_mean_squared_error: 0.1158 - val_loss: 0.0135 - val_root_mean_squared_error: 0.1163
Epoch 28/150
10/10 [==============================] - 0s 22ms/step - loss: 0.0133 - root_mean_squared_error: 0.1151 - val_loss: 0.0134 - val_root_mean_squared_error: 0.1158
Epoch 29/150
10/10 [==============================] - 0s 20ms/step - loss: 0.0125 - root_mean_squared_error: 0.1120 - val_loss: 0.0128 - val_root_mean_squared_error: 0.1132
Epoch 30/150
10/10 [==============================] - 0s 20ms/step - loss: 0.0129 - root_mean_squared_error: 0.1134 - val_loss: 0.0126 - val_root_mean_squared_error: 0.1124
Epoch 31/150
10/10 [==============================] - 0s 23ms/step - loss: 0.0127 - root_mean_squared_error: 0.1126 - val_loss: 0.0125 - val_root_mean_squared_error: 0.1116
Epoch 32/150
10/10 [==============================] - 0s 21ms/step - loss: 0.0126 - root_mean_squared_error: 0.1122 - val_loss: 0.0125 - val_root_mean_squared_error: 0.1118
Epoch 33/150
10/10 [==============================] - 0s 22ms/step - loss: 0.0129 - root_mean_squared_error: 0.1137 - val_loss: 0.0120 - val_root_mean_squared_error: 0.1096
Epoch 34/150
10/10 [==============================] - 0s 20ms/step - loss: 0.0124 - root_mean_squared_error: 0.1112 - val_loss: 0.0129 - val_root_mean_squared_error: 0.1138
Epoch 35/150
10/10 [==============================] - 0s 20ms/step - loss: 0.0121 - root_mean_squared_error: 0.1099 - val_loss: 0.0119 - val_root_mean_squared_error: 0.1089
Epoch 36/150
10/10 [==============================] - 0s 22ms/step - loss: 0.0121 - root_mean_squared_error: 0.1100 - val_loss: 0.0126 - val_root_mean_squared_error: 0.1123
Epoch 37/150
10/10 [==============================] - 0s 21ms/step - loss: 0.0118 - root_mean_squared_error: 0.1087 - val_loss: 0.0121 - val_root_mean_squared_error: 0.1100
Epoch 38/150
10/10 [==============================] - 0s 19ms/step - loss: 0.0113 - root_mean_squared_error: 0.1062 - val_loss: 0.0116 - val_root_mean_squared_error: 0.1077
Epoch 39/150
10/10 [==============================] - 0s 20ms/step - loss: 0.0121 - root_mean_squared_error: 0.1099 - val_loss: 0.0113 - val_root_mean_squared_error: 0.1063
Epoch 40/150
10/10 [==============================] - 0s 22ms/step - loss: 0.0114 - root_mean_squared_error: 0.1067 - val_loss: 0.0122 - val_root_mean_squared_error: 0.1102
Epoch 41/150
10/10 [==============================] - 0s 23ms/step - loss: 0.0109 - root_mean_squared_error: 0.1044 - val_loss: 0.0112 - val_root_mean_squared_error: 0.1058
Epoch 42/150
10/10 [==============================] - 0s 24ms/step - loss: 0.0116 - root_mean_squared_error: 0.1077 - val_loss: 0.0114 - val_root_mean_squared_error: 0.1069
Epoch 43/150
10/10 [==============================] - 0s 21ms/step - loss: 0.0117 - root_mean_squared_error: 0.1080 - val_loss: 0.0109 - val_root_mean_squared_error: 0.1043
Epoch 44/150
10/10 [==============================] - 0s 21ms/step - loss: 0.0107 - root_mean_squared_error: 0.1034 - val_loss: 0.0115 - val_root_mean_squared_error: 0.1071
Epoch 45/150
10/10 [==============================] - 0s 21ms/step - loss: 0.0108 - root_mean_squared_error: 0.1041 - val_loss: 0.0109 - val_root_mean_squared_error: 0.1046
Epoch 46/150
10/10 [==============================] - 0s 20ms/step - loss: 0.0114 - root_mean_squared_error: 0.1067 - val_loss: 0.0112 - val_root_mean_squared_error: 0.1057
Epoch 47/150
10/10 [==============================] - 0s 21ms/step - loss: 0.0105 - root_mean_squared_error: 0.1027 - val_loss: 0.0105 - val_root_mean_squared_error: 0.1027
Epoch 48/150
10/10 [==============================] - 0s 21ms/step - loss: 0.0106 - root_mean_squared_error: 0.1031 - val_loss: 0.0105 - val_root_mean_squared_error: 0.1024
Epoch 49/150
10/10 [==============================] - 0s 23ms/step - loss: 0.0106 - root_mean_squared_error: 0.1032 - val_loss: 0.0109 - val_root_mean_squared_error: 0.1044
Epoch 50/150
10/10 [==============================] - 0s 21ms/step - loss: 0.0109 - root_mean_squared_error: 0.1045 - val_loss: 0.0103 - val_root_mean_squared_error: 0.1015
Epoch 51/150
10/10 [==============================] - 0s 20ms/step - loss: 0.0104 - root_mean_squared_error: 0.1021 - val_loss: 0.0108 - val_root_mean_squared_error: 0.1040
Epoch 52/150
10/10 [==============================] - 0s 22ms/step - loss: 0.0106 - root_mean_squared_error: 0.1027 - val_loss: 0.0103 - val_root_mean_squared_error: 0.1015
Epoch 53/150
10/10 [==============================] - 0s 22ms/step - loss: 0.0109 - root_mean_squared_error: 0.1042 - val_loss: 0.0104 - val_root_mean_squared_error: 0.1020
Epoch 54/150
10/10 [==============================] - 0s 21ms/step - loss: 0.0112 - root_mean_squared_error: 0.1059 - val_loss: 0.0106 - val_root_mean_squared_error: 0.1028
Epoch 55/150
10/10 [==============================] - 0s 21ms/step - loss: 0.0099 - root_mean_squared_error: 0.0993 - val_loss: 0.0107 - val_root_mean_squared_error: 0.1035
Epoch 56/150
10/10 [==============================] - 0s 20ms/step - loss: 0.0103 - root_mean_squared_error: 0.1016 - val_loss: 0.0104 - val_root_mean_squared_error: 0.1018
Epoch 57/150
10/10 [==============================] - 0s 20ms/step - loss: 0.0097 - root_mean_squared_error: 0.0982 - val_loss: 0.0102 - val_root_mean_squared_error: 0.1010
Epoch 58/150
10/10 [==============================] - 0s 20ms/step - loss: 0.0100 - root_mean_squared_error: 0.1002 - val_loss: 0.0105 - val_root_mean_squared_error: 0.1026
Epoch 59/150
10/10 [==============================] - 0s 20ms/step - loss: 0.0103 - root_mean_squared_error: 0.1013 - val_loss: 0.0098 - val_root_mean_squared_error: 0.0988
Epoch 60/150
10/10 [==============================] - 0s 22ms/step - loss: 0.0101 - root_mean_squared_error: 0.1007 - val_loss: 0.0100 - val_root_mean_squared_error: 0.0998
Epoch 61/150
10/10 [==============================] - 0s 21ms/step - loss: 0.0101 - root_mean_squared_error: 0.1003 - val_loss: 0.0103 - val_root_mean_squared_error: 0.1016
Epoch 62/150
10/10 [==============================] - 0s 22ms/step - loss: 0.0101 - root_mean_squared_error: 0.1005 - val_loss: 0.0104 - val_root_mean_squared_error: 0.1021
Epoch 63/150
10/10 [==============================] - 0s 21ms/step - loss: 0.0098 - root_mean_squared_error: 0.0992 - val_loss: 0.0096 - val_root_mean_squared_error: 0.0980
Epoch 64/150
10/10 [==============================] - 0s 21ms/step - loss: 0.0097 - root_mean_squared_error: 0.0986 - val_loss: 0.0098 - val_root_mean_squared_error: 0.0991
Epoch 65/150
10/10 [==============================] - 0s 22ms/step - loss: 0.0095 - root_mean_squared_error: 0.0973 - val_loss: 0.0098 - val_root_mean_squared_error: 0.0990
Epoch 66/150
10/10 [==============================] - 0s 20ms/step - loss: 0.0096 - root_mean_squared_error: 0.0980 - val_loss: 0.0095 - val_root_mean_squared_error: 0.0974
Epoch 67/150
10/10 [==============================] - 0s 27ms/step - loss: 0.0095 - root_mean_squared_error: 0.0976 - val_loss: 0.0094 - val_root_mean_squared_error: 0.0967
Epoch 68/150
10/10 [==============================] - 0s 22ms/step - loss: 0.0101 - root_mean_squared_error: 0.1006 - val_loss: 0.0094 - val_root_mean_squared_error: 0.0968
Epoch 69/150
10/10 [==============================] - 0s 19ms/step - loss: 0.0098 - root_mean_squared_error: 0.0989 - val_loss: 0.0097 - val_root_mean_squared_error: 0.0987
Epoch 70/150
10/10 [==============================] - 0s 20ms/step - loss: 0.0097 - root_mean_squared_error: 0.0983 - val_loss: 0.0095 - val_root_mean_squared_error: 0.0973
Epoch 71/150
10/10 [==============================] - 0s 19ms/step - loss: 0.0100 - root_mean_squared_error: 0.1000 - val_loss: 0.0091 - val_root_mean_squared_error: 0.0954
Epoch 72/150
10/10 [==============================] - 0s 20ms/step - loss: 0.0089 - root_mean_squared_error: 0.0944 - val_loss: 0.0095 - val_root_mean_squared_error: 0.0975
Epoch 73/150
10/10 [==============================] - 0s 22ms/step - loss: 0.0093 - root_mean_squared_error: 0.0962 - val_loss: 0.0094 - val_root_mean_squared_error: 0.0970
Epoch 74/150
10/10 [==============================] - 0s 22ms/step - loss: 0.0097 - root_mean_squared_error: 0.0985 - val_loss: 0.0101 - val_root_mean_squared_error: 0.1004
Epoch 75/150
10/10 [==============================] - 0s 22ms/step - loss: 0.0101 - root_mean_squared_error: 0.1004 - val_loss: 0.0100 - val_root_mean_squared_error: 0.1001
Epoch 76/150
10/10 [==============================] - 0s 21ms/step - loss: 0.0089 - root_mean_squared_error: 0.0945 - val_loss: 0.0094 - val_root_mean_squared_error: 0.0970
Epoch 77/150
10/10 [==============================] - 0s 20ms/step - loss: 0.0093 - root_mean_squared_error: 0.0966 - val_loss: 0.0094 - val_root_mean_squared_error: 0.0967
Epoch 78/150
10/10 [==============================] - 0s 22ms/step - loss: 0.0092 - root_mean_squared_error: 0.0960 - val_loss: 0.0094 - val_root_mean_squared_error: 0.0970
Epoch 79/150
10/10 [==============================] - 0s 21ms/step - loss: 0.0085 - root_mean_squared_error: 0.0919 - val_loss: 0.0090 - val_root_mean_squared_error: 0.0949
Epoch 80/150
10/10 [==============================] - 0s 22ms/step - loss: 0.0090 - root_mean_squared_error: 0.0947 - val_loss: 0.0088 - val_root_mean_squared_error: 0.0939
Epoch 81/150
10/10 [==============================] - 0s 20ms/step - loss: 0.0089 - root_mean_squared_error: 0.0941 - val_loss: 0.0089 - val_root_mean_squared_error: 0.0945
Epoch 82/150
10/10 [==============================] - 0s 20ms/step - loss: 0.0094 - root_mean_squared_error: 0.0968 - val_loss: 0.0091 - val_root_mean_squared_error: 0.0955
Epoch 83/150
10/10 [==============================] - 0s 22ms/step - loss: 0.0085 - root_mean_squared_error: 0.0924 - val_loss: 0.0089 - val_root_mean_squared_error: 0.0945
Epoch 84/150
10/10 [==============================] - 0s 20ms/step - loss: 0.0089 - root_mean_squared_error: 0.0943 - val_loss: 0.0096 - val_root_mean_squared_error: 0.0977
Epoch 85/150
10/10 [==============================] - 0s 21ms/step - loss: 0.0091 - root_mean_squared_error: 0.0955 - val_loss: 0.0089 - val_root_mean_squared_error: 0.0943
Epoch 86/150
10/10 [==============================] - 0s 22ms/step - loss: 0.0085 - root_mean_squared_error: 0.0922 - val_loss: 0.0092 - val_root_mean_squared_error: 0.0961
Epoch 87/150
10/10 [==============================] - 0s 21ms/step - loss: 0.0088 - root_mean_squared_error: 0.0936 - val_loss: 0.0085 - val_root_mean_squared_error: 0.0924
Epoch 88/150
10/10 [==============================] - 0s 20ms/step - loss: 0.0088 - root_mean_squared_error: 0.0935 - val_loss: 0.0090 - val_root_mean_squared_error: 0.0946
Epoch 89/150
10/10 [==============================] - 0s 20ms/step - loss: 0.0086 - root_mean_squared_error: 0.0928 - val_loss: 0.0089 - val_root_mean_squared_error: 0.0943
Epoch 90/150
10/10 [==============================] - 0s 20ms/step - loss: 0.0084 - root_mean_squared_error: 0.0916 - val_loss: 0.0088 - val_root_mean_squared_error: 0.0936
Epoch 91/150
10/10 [==============================] - 0s 20ms/step - loss: 0.0090 - root_mean_squared_error: 0.0946 - val_loss: 0.0089 - val_root_mean_squared_error: 0.0946
Epoch 92/150
10/10 [==============================] - 0s 22ms/step - loss: 0.0083 - root_mean_squared_error: 0.0912 - val_loss: 0.0089 - val_root_mean_squared_error: 0.0945
Epoch 93/150
10/10 [==============================] - 0s 19ms/step - loss: 0.0084 - root_mean_squared_error: 0.0916 - val_loss: 0.0085 - val_root_mean_squared_error: 0.0923
Epoch 94/150
10/10 [==============================] - 0s 22ms/step - loss: 0.0083 - root_mean_squared_error: 0.0910 - val_loss: 0.0095 - val_root_mean_squared_error: 0.0976
Epoch 95/150
10/10 [==============================] - 0s 21ms/step - loss: 0.0084 - root_mean_squared_error: 0.0919 - val_loss: 0.0084 - val_root_mean_squared_error: 0.0919
Epoch 96/150
10/10 [==============================] - 0s 20ms/step - loss: 0.0082 - root_mean_squared_error: 0.0907 - val_loss: 0.0086 - val_root_mean_squared_error: 0.0927
Epoch 97/150
10/10 [==============================] - 0s 20ms/step - loss: 0.0087 - root_mean_squared_error: 0.0933 - val_loss: 0.0090 - val_root_mean_squared_error: 0.0947
Epoch 98/150
10/10 [==============================] - 0s 21ms/step - loss: 0.0083 - root_mean_squared_error: 0.0910 - val_loss: 0.0083 - val_root_mean_squared_error: 0.0911
Epoch 99/150
10/10 [==============================] - 0s 20ms/step - loss: 0.0082 - root_mean_squared_error: 0.0903 - val_loss: 0.0086 - val_root_mean_squared_error: 0.0927
Epoch 100/150
10/10 [==============================] - 0s 19ms/step - loss: 0.0084 - root_mean_squared_error: 0.0918 - val_loss: 0.0090 - val_root_mean_squared_error: 0.0950
Epoch 101/150
10/10 [==============================] - 0s 20ms/step - loss: 0.0083 - root_mean_squared_error: 0.0912 - val_loss: 0.0089 - val_root_mean_squared_error: 0.0941
Epoch 102/150
10/10 [==============================] - 0s 19ms/step - loss: 0.0088 - root_mean_squared_error: 0.0940 - val_loss: 0.0085 - val_root_mean_squared_error: 0.0920
Epoch 103/150
10/10 [==============================] - 0s 20ms/step - loss: 0.0081 - root_mean_squared_error: 0.0901 - val_loss: 0.0087 - val_root_mean_squared_error: 0.0933
Epoch 104/150
10/10 [==============================] - 0s 21ms/step - loss: 0.0083 - root_mean_squared_error: 0.0914 - val_loss: 0.0085 - val_root_mean_squared_error: 0.0922
Epoch 105/150
10/10 [==============================] - 0s 21ms/step - loss: 0.0085 - root_mean_squared_error: 0.0919 - val_loss: 0.0083 - val_root_mean_squared_error: 0.0909
Epoch 106/150
10/10 [==============================] - 0s 20ms/step - loss: 0.0084 - root_mean_squared_error: 0.0915 - val_loss: 0.0085 - val_root_mean_squared_error: 0.0923
Epoch 107/150
10/10 [==============================] - 0s 20ms/step - loss: 0.0084 - root_mean_squared_error: 0.0916 - val_loss: 0.0086 - val_root_mean_squared_error: 0.0928
Epoch 108/150
10/10 [==============================] - 0s 18ms/step - loss: 0.0082 - root_mean_squared_error: 0.0905 - val_loss: 0.0085 - val_root_mean_squared_error: 0.0922
Epoch 109/150
10/10 [==============================] - 0s 19ms/step - loss: 0.0088 - root_mean_squared_error: 0.0940 - val_loss: 0.0087 - val_root_mean_squared_error: 0.0935
Epoch 110/150
10/10 [==============================] - 0s 20ms/step - loss: 0.0085 - root_mean_squared_error: 0.0920 - val_loss: 0.0088 - val_root_mean_squared_error: 0.0939
Epoch 111/150
10/10 [==============================] - 0s 20ms/step - loss: 0.0083 - root_mean_squared_error: 0.0914 - val_loss: 0.0088 - val_root_mean_squared_error: 0.0939
Epoch 112/150
10/10 [==============================] - 0s 21ms/step - loss: 0.0087 - root_mean_squared_error: 0.0933 - val_loss: 0.0085 - val_root_mean_squared_error: 0.0922
Epoch 113/150
10/10 [==============================] - 0s 20ms/step - loss: 0.0085 - root_mean_squared_error: 0.0922 - val_loss: 0.0085 - val_root_mean_squared_error: 0.0924
Epoch 114/150
10/10 [==============================] - 0s 20ms/step - loss: 0.0085 - root_mean_squared_error: 0.0920 - val_loss: 0.0086 - val_root_mean_squared_error: 0.0928
Epoch 115/150
10/10 [==============================] - 0s 20ms/step - loss: 0.0084 - root_mean_squared_error: 0.0918 - val_loss: 0.0087 - val_root_mean_squared_error: 0.0934
Epoch 116/150
10/10 [==============================] - 0s 20ms/step - loss: 0.0081 - root_mean_squared_error: 0.0898 - val_loss: 0.0086 - val_root_mean_squared_error: 0.0930
Epoch 117/150
10/10 [==============================] - 0s 20ms/step - loss: 0.0084 - root_mean_squared_error: 0.0915 - val_loss: 0.0091 - val_root_mean_squared_error: 0.0956
Epoch 118/150
10/10 [==============================] - 0s 20ms/step - loss: 0.0087 - root_mean_squared_error: 0.0931 - val_loss: 0.0089 - val_root_mean_squared_error: 0.0942
Epoch 119/150
10/10 [==============================] - 0s 20ms/step - loss: 0.0084 - root_mean_squared_error: 0.0914 - val_loss: 0.0092 - val_root_mean_squared_error: 0.0960
Epoch 120/150
10/10 [==============================] - 0s 21ms/step - loss: 0.0083 - root_mean_squared_error: 0.0912 - val_loss: 0.0085 - val_root_mean_squared_error: 0.0923
Epoch 121/150
10/10 [==============================] - 0s 21ms/step - loss: 0.0085 - root_mean_squared_error: 0.0922 - val_loss: 0.0086 - val_root_mean_squared_error: 0.0926
Epoch 122/150
10/10 [==============================] - 0s 23ms/step - loss: 0.0083 - root_mean_squared_error: 0.0913 - val_loss: 0.0083 - val_root_mean_squared_error: 0.0912
Epoch 123/150
10/10 [==============================] - 0s 20ms/step - loss: 0.0079 - root_mean_squared_error: 0.0888 - val_loss: 0.0083 - val_root_mean_squared_error: 0.0910
Epoch 124/150
10/10 [==============================] - 0s 21ms/step - loss: 0.0085 - root_mean_squared_error: 0.0920 - val_loss: 0.0082 - val_root_mean_squared_error: 0.0906
Epoch 125/150
10/10 [==============================] - 0s 20ms/step - loss: 0.0082 - root_mean_squared_error: 0.0903 - val_loss: 0.0087 - val_root_mean_squared_error: 0.0935
Epoch 126/150
10/10 [==============================] - 0s 21ms/step - loss: 0.0082 - root_mean_squared_error: 0.0904 - val_loss: 0.0083 - val_root_mean_squared_error: 0.0913
Epoch 127/150
10/10 [==============================] - 0s 19ms/step - loss: 0.0084 - root_mean_squared_error: 0.0915 - val_loss: 0.0084 - val_root_mean_squared_error: 0.0914
Epoch 128/150
10/10 [==============================] - 0s 22ms/step - loss: 0.0080 - root_mean_squared_error: 0.0896 - val_loss: 0.0088 - val_root_mean_squared_error: 0.0940
Epoch 129/150
10/10 [==============================] - 0s 21ms/step - loss: 0.0079 - root_mean_squared_error: 0.0890 - val_loss: 0.0088 - val_root_mean_squared_error: 0.0939
Epoch 130/150
10/10 [==============================] - 0s 20ms/step - loss: 0.0081 - root_mean_squared_error: 0.0898 - val_loss: 0.0087 - val_root_mean_squared_error: 0.0931
Epoch 131/150
10/10 [==============================] - 0s 20ms/step - loss: 0.0082 - root_mean_squared_error: 0.0905 - val_loss: 0.0092 - val_root_mean_squared_error: 0.0959
Epoch 132/150
10/10 [==============================] - 0s 20ms/step - loss: 0.0085 - root_mean_squared_error: 0.0920 - val_loss: 0.0087 - val_root_mean_squared_error: 0.0935
Epoch 133/150
10/10 [==============================] - 0s 21ms/step - loss: 0.0083 - root_mean_squared_error: 0.0912 - val_loss: 0.0088 - val_root_mean_squared_error: 0.0938
Epoch 134/150
10/10 [==============================] - 0s 21ms/step - loss: 0.0079 - root_mean_squared_error: 0.0889 - val_loss: 0.0088 - val_root_mean_squared_error: 0.0938
Epoch 135/150
10/10 [==============================] - 0s 21ms/step - loss: 0.0085 - root_mean_squared_error: 0.0924 - val_loss: 0.0088 - val_root_mean_squared_error: 0.0939
Epoch 136/150
10/10 [==============================] - 0s 21ms/step - loss: 0.0079 - root_mean_squared_error: 0.0886 - val_loss: 0.0089 - val_root_mean_squared_error: 0.0945
Epoch 137/150
10/10 [==============================] - 0s 21ms/step - loss: 0.0080 - root_mean_squared_error: 0.0892 - val_loss: 0.0090 - val_root_mean_squared_error: 0.0950
Epoch 138/150
10/10 [==============================] - 0s 20ms/step - loss: 0.0078 - root_mean_squared_error: 0.0881 - val_loss: 0.0087 - val_root_mean_squared_error: 0.0933
Epoch 139/150
10/10 [==============================] - 0s 20ms/step - loss: 0.0083 - root_mean_squared_error: 0.0910 - val_loss: 0.0092 - val_root_mean_squared_error: 0.0961
Epoch 140/150
10/10 [==============================] - 0s 19ms/step - loss: 0.0082 - root_mean_squared_error: 0.0907 - val_loss: 0.0090 - val_root_mean_squared_error: 0.0950
Epoch 141/150
10/10 [==============================] - 0s 19ms/step - loss: 0.0079 - root_mean_squared_error: 0.0890 - val_loss: 0.0089 - val_root_mean_squared_error: 0.0944
Epoch 142/150
10/10 [==============================] - 0s 19ms/step - loss: 0.0083 - root_mean_squared_error: 0.0912 - val_loss: 0.0092 - val_root_mean_squared_error: 0.0959
Epoch 143/150
10/10 [==============================] - 0s 19ms/step - loss: 0.0083 - root_mean_squared_error: 0.0911 - val_loss: 0.0086 - val_root_mean_squared_error: 0.0929
Epoch 144/150
10/10 [==============================] - 0s 20ms/step - loss: 0.0082 - root_mean_squared_error: 0.0905 - val_loss: 0.0089 - val_root_mean_squared_error: 0.0942
Epoch 145/150
10/10 [==============================] - 0s 21ms/step - loss: 0.0082 - root_mean_squared_error: 0.0905 - val_loss: 0.0096 - val_root_mean_squared_error: 0.0980
Epoch 146/150
10/10 [==============================] - 0s 20ms/step - loss: 0.0080 - root_mean_squared_error: 0.0892 - val_loss: 0.0092 - val_root_mean_squared_error: 0.0958
Epoch 147/150
10/10 [==============================] - 0s 19ms/step - loss: 0.0083 - root_mean_squared_error: 0.0913 - val_loss: 0.0088 - val_root_mean_squared_error: 0.0939
Epoch 148/150
10/10 [==============================] - 0s 19ms/step - loss: 0.0085 - root_mean_squared_error: 0.0920 - val_loss: 0.0093 - val_root_mean_squared_error: 0.0965
Epoch 149/150
10/10 [==============================] - 0s 20ms/step - loss: 0.0079 - root_mean_squared_error: 0.0887 - val_loss: 0.0087 - val_root_mean_squared_error: 0.0934
Epoch 150/150
10/10 [==============================] - 0s 20ms/step - loss: 0.0083 - root_mean_squared_error: 0.0912 - val_loss: 0.0090 - val_root_mean_squared_error: 0.0948
In [320]:
history.history.keys()
Out[320]:
dict_keys(['loss', 'root_mean_squared_error', 'val_loss', 'val_root_mean_squared_error'])
In [321]:
fig = plt.figure(figsize=(20,7))
fig.add_subplot(121)
# Accuracy
plt.plot(history.epoch, history.history['root_mean_squared_error'], label = "rmse")
plt.plot(history.epoch, history.history['val_root_mean_squared_error'], label = "val_rmse")
plt.title("RMSE", fontsize=18)
plt.xlabel("Epochs", fontsize=15)
plt.ylabel("RMSE", fontsize=15)
plt.grid(alpha=0.3)
plt.legend()
#Adding Subplot 1 (For Loss)
fig.add_subplot(122)
plt.plot(history.epoch, history.history['loss'], label="loss")
plt.plot(history.epoch, history.history['val_loss'], label="val_loss")
plt.title("Loss", fontsize=18)
plt.xlabel("Epochs", fontsize=15)
plt.ylabel("Loss", fontsize=15)
plt.grid(alpha=0.3)
plt.legend()
plt.show()
In [322]:
results = regressor.evaluate(X_test, y_test)
print("test loss, test acc:", np.round(results, 4))
18/18 [==============================] - 0s 6ms/step - loss: 0.0140 - root_mean_squared_error: 0.1182
test loss, test acc: [0.014  0.1182]
In [324]:
predictions = regressor.predict(X_test)
18/18 [==============================] - 1s 5ms/step
In [326]:
len(predictions)
Out[326]:
571
In [329]:
from sklearn.metrics import r2_score
r2 = r2_score(y_test, predictions)
print("R-squared (R2) Score:", r2)
R-squared (R2) Score: 0.4263909117557261
In [330]:
LSTM_=pd.DataFrame(y_test , columns=["Test"])
In [334]:
LSTM_["LSTM"]=predictions
In [335]:
LSTM_
Out[335]:
Test LSTM
0 0.000000 0.070894
1 0.001923 0.038783
2 0.001923 0.004139
3 0.003846 -0.015221
4 0.046154 0.025185
... ... ...
566 0.001923 -0.001025
567 0.005769 0.006319
568 0.046154 0.138052
569 0.273077 0.430540
570 0.476923 0.304305

571 rows × 2 columns

In [342]:
pd.DataFrame(data_mod_scaled)[:571]
Out[342]:
0
0 0.196154
1 0.059615
2 0.117308
3 0.136538
4 0.065385
... ...
566 0.076923
567 0.326923
568 0.057692
569 0.000000
570 0.001923

571 rows × 1 columns

In [366]:
print(X_train.shape)
print(X_test.shape)

print(y_train.shape)
print(y_test.shape)
(856, 25, 1)
(571, 25, 1)
(856, 1)
(571, 1)
In [369]:
# Fit ARIMA model
order = (5, 1, 0)  # Example order parameters (p, d, q)
arima_model = sm.tsa.ARIMA(train_data, order=order)
arima_result = arima_model.fit()
In [370]:
seasonal_order = (2, 1, 1, 12)  
sarima_model = sm.tsa.statespace.SARIMAX(train_data, order=order, seasonal_order=seasonal_order)
sarima_result = sarima_model.fit()

arima_forecast = arima_result.forecast(steps=len(test_data))

sarima_forecast = sarima_result.forecast(steps=len(test_data))
In [636]:
plt.figure(figsize=(12, 6))
plt.plot(train_data.index, train_data, label='Training data')
plt.plot(test_data.index, test_data, label='Test data')
plt.plot(test_data.index, sarima_forecast, label='SARIMA forecast')
plt.xlabel('Date')
plt.ylabel('Value')
plt.title(' SARIMA Forecasts')
plt.legend()
plt.show()
In [372]:
test_data
Out[372]:
0
1016 0.003846
1017 0.071154
1018 0.580769
1019 0.407692
1020 0.592308
... ...
1447 0.001923
1448 0.005769
1449 0.046154
1450 0.273077
1451 0.476923

436 rows × 1 columns

In [373]:
LSTM_
Out[373]:
Test LSTM
0 0.000000 0.070894
1 0.001923 0.038783
2 0.001923 0.004139
3 0.003846 -0.015221
4 0.046154 0.025185
... ... ...
566 0.001923 -0.001025
567 0.005769 0.006319
568 0.046154 0.138052
569 0.273077 0.430540
570 0.476923 0.304305

571 rows × 2 columns

In [374]:
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
arima_forecast = arima_result.forecast(steps=len(test_data))
sarima_forecast = sarima_result.forecast(steps=len(test_data))
arima_mae = mean_absolute_error(test_data, arima_forecast)
arima_mse = mean_squared_error(test_data, arima_forecast)
arima_rmse = np.sqrt(arima_mse)
arima_r2 = r2_score(test_data, arima_forecast)
sarima_mae = mean_absolute_error(test_data, sarima_forecast)
sarima_mse = mean_squared_error(test_data, sarima_forecast)
sarima_rmse = np.sqrt(sarima_mse)
sarima_r2 = r2_score(test_data, sarima_forecast)
print("ARIMA Model:")
print("MAE:", arima_mae)
print("MSE:", arima_mse)
print("RMSE:", arima_rmse)
print("R-squared (R2) Score:", arima_r2)
print("\nSARIMA Model:")
print("MAE:", sarima_mae)
print("MSE:", sarima_mse)
print("RMSE:", sarima_rmse)
print("R-squared (R2) Score:", sarima_r2)
ARIMA Model:
MAE: 0.10894611746410407
MSE: 0.03377255018414353
RMSE: 0.18377309428788408
R-squared (R2) Score: -0.27359553993274344

SARIMA Model:
MAE: 0.058482050507392726
MSE: 0.012407223176341142
RMSE: 0.11138771555401045
R-squared (R2) Score: 0.5321116109331506
In [382]:
ForeCating=pd.DataFrame(sarima_forecast.reset_index() ,columns=["sarima_forecast"])
In [386]:
ForeCating=sarima_forecast.reset_index()
In [388]:
ForeCating.drop(["index"] , axis=1 , inplace=True)
In [391]:
ForeCating.columns=[["sarima_forecast"]]
In [392]:
ForeCating
Out[392]:
sarima_forecast
0 0.009516
1 0.078747
2 0.379815
3 0.262313
4 0.229006
... ...
431 0.018702
432 0.019970
433 0.085060
434 0.388992
435 0.270370

436 rows × 1 columns

In [394]:
test
Out[394]:
test ARIMA
Date
1985-09-01 0.003846 0.100064
1985-10-01 0.071154 0.116146
1985-11-01 0.580769 0.156372
1985-12-01 0.407692 0.364222
1986-01-01 0.592308 0.205666
... ... ...
2021-08-01 0.001923 0.041139
2021-09-01 0.005769 0.107671
2021-10-01 0.046154 0.042625
2021-11-01 0.273077 0.122090
2021-12-01 0.476923 0.136350

436 rows × 2 columns

In [300]:
from scipy.stats import kruskal
def seasonality_test(series):
        seasoanl = False
        idx = np.arange(len(series.index)) % 12
        H_statistic, p_value = kruskal(series, idx)
        if p_value <= 0.05:
            seasonal = True
        return seasonal
S_test=seasonality_test(Data[Fore])
In [301]:
Data[Fore].plot()
Out[301]:
<Axes: xlabel='Date'>
In [ ]:
data['Date'] = pd.to_datetime(data['Date'])
In [505]:
Data=data.copy()
In [279]:
data=Data.copy()
In [504]:
data
Out[504]:
Value
Date
2001-01-01 10.2
2001-02-01 3.1
2001-03-01 6.1
2001-04-01 7.1
2001-05-01 3.4
... ...
2021-08-01 0.1
2021-09-01 0.3
2021-10-01 2.4
2021-11-01 14.2
2021-12-01 24.8

1452 rows × 1 columns

In [495]:
data.index
Out[495]:
DatetimeIndex(['2001-01-01', '2001-02-01', '2001-03-01', '2001-04-01',
               '2001-05-01', '2001-06-01', '2001-07-01', '2001-08-01',
               '2001-09-01', '2001-10-01',
               ...
               '2021-03-01', '2021-04-01', '2021-05-01', '2021-06-01',
               '2021-07-01', '2021-08-01', '2021-09-01', '2021-10-01',
               '2021-11-01', '2021-12-01'],
              dtype='datetime64[ns]', name='Date', length=1452, freq=None)
In [283]:
type(data.index)
Out[283]:
pandas.core.indexes.datetimes.DatetimeIndex
In [284]:
Data=data.copy()
In [289]:
print(Data.shape)
Train_nu= int(input("The Num u That wanna Train "))
Train=Data[:Train_nu]
Test=Data[Train_nu:]
(1452, 1)
The Num u That wanna Train 440
In [290]:
type(Train.index)
Out[290]:
pandas.core.indexes.datetimes.DatetimeIndex
In [293]:
from scipy.stats import kruskal
def seasonality_test(series):
        seasoanl = False
        idx = np.arange(len(series.index)) % 12
        H_statistic, p_value = kruskal(series, idx)
        if p_value <= 0.05:
            seasonal = True
        return seasonal
S_test=seasonality_test(Data[Fore])
In [292]:
Fore="Value"
In [294]:
Data[Fore].plot()
Out[294]:
<Axes: xlabel='Date'>
In [211]:
"""plt.plot(data.index, data['Value'], label='Value')
plt.title('Time Series Plot')
plt.xlabel('Date')
plt.ylabel('Value')
plt.legend()
plt.show()"""
Out[211]:
"plt.plot(data.index, data['Value'], label='Value')\nplt.title('Time Series Plot')\nplt.xlabel('Date')\nplt.ylabel('Value')\nplt.legend()\nplt.show()"
In [212]:
"""from statsmodels.tsa.seasonal import seasonal_decompose
from statsmodels.tsa.stattools import adfuller
def test_stationarity(timeseries):
    rolmean = timeseries.rolling(window=12).mean()
    rolstd = timeseries.rolling(window=12).std()

    # Plot rolling statistics:
    plt.figure(figsize=(10, 6))
    orig = plt.plot(timeseries, color='blue', label='Original')
    mean = plt.plot(rolmean, color='red', label='Rolling Mean')
    std = plt.plot(rolstd, color='black', label='Rolling Std')
    plt.title('Rolling Statistics')
    plt.legend(loc='best')
    plt.show()

    # Perform Dickey-Fuller test:
    print('Results of Dickey-Fuller Test:')
    dftest = adfuller(timeseries, autolag='AIC')
    dfoutput = pd.Series(dftest[0:4], index=['Test Statistic', 'p-value', '#Lags Used', 'Number of Observations Used'])
    for key, value in dftest[4].items():
        dfoutput['Critical Value (%s)' % key] = value
    print(dfoutput)

test_stationarity(data['Value'])"""
Out[212]:
"from statsmodels.tsa.seasonal import seasonal_decompose\nfrom statsmodels.tsa.stattools import adfuller\ndef test_stationarity(timeseries):\n    rolmean = timeseries.rolling(window=12).mean()\n    rolstd = timeseries.rolling(window=12).std()\n\n    # Plot rolling statistics:\n    plt.figure(figsize=(10, 6))\n    orig = plt.plot(timeseries, color='blue', label='Original')\n    mean = plt.plot(rolmean, color='red', label='Rolling Mean')\n    std = plt.plot(rolstd, color='black', label='Rolling Std')\n    plt.title('Rolling Statistics')\n    plt.legend(loc='best')\n    plt.show()\n\n    # Perform Dickey-Fuller test:\n    print('Results of Dickey-Fuller Test:')\n    dftest = adfuller(timeseries, autolag='AIC')\n    dfoutput = pd.Series(dftest[0:4], index=['Test Statistic', 'p-value', '#Lags Used', 'Number of Observations Used'])\n    for key, value in dftest[4].items():\n        dfoutput['Critical Value (%s)' % key] = value\n    print(dfoutput)\n\ntest_stationarity(data['Value'])"
In [213]:
"""decomposition = seasonal_decompose(data['Value'], model='additive', period=12)
trend = decomposition.trend
seasonal = decomposition.seasonal
residual = decomposition.resid"""
Out[213]:
"decomposition = seasonal_decompose(data['Value'], model='additive', period=12)\ntrend = decomposition.trend\nseasonal = decomposition.seasonal\nresidual = decomposition.resid"
In [214]:
"""plt.figure(figsize=(10, 8))
plt.subplot(411)
plt.plot(data['Value'], label='Original')
plt.legend(loc='best')
plt.subplot(412)
plt.plot(trend, label='Trend')
plt.legend(loc='best')
plt.subplot(413)
plt.plot(seasonal, label='Seasonality')
plt.legend(loc='best')
plt.subplot(414)
plt.plot(residual, label='Residuals')
plt.legend(loc='best')
plt.tight_layout()"""
Out[214]:
"plt.figure(figsize=(10, 8))\nplt.subplot(411)\nplt.plot(data['Value'], label='Original')\nplt.legend(loc='best')\nplt.subplot(412)\nplt.plot(trend, label='Trend')\nplt.legend(loc='best')\nplt.subplot(413)\nplt.plot(seasonal, label='Seasonality')\nplt.legend(loc='best')\nplt.subplot(414)\nplt.plot(residual, label='Residuals')\nplt.legend(loc='best')\nplt.tight_layout()"
In [215]:
"""plt.figure(figsize=(12, 6))
plt.subplot(211)
plt.plot(seasonal)
plt.title('Seasonal Component')
plt.subplot(212)
pd.plotting.autocorrelation_plot(data['Value'])
plt.title('Autocorrelation Plot')
plt.tight_layout()
plt.show()"""
Out[215]:
"plt.figure(figsize=(12, 6))\nplt.subplot(211)\nplt.plot(seasonal)\nplt.title('Seasonal Component')\nplt.subplot(212)\npd.plotting.autocorrelation_plot(data['Value'])\nplt.title('Autocorrelation Plot')\nplt.tight_layout()\nplt.show()"
In [496]:
from pmdarima import auto_arima
from statsmodels.tsa.arima.model import ARIMA 
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
from sklearn.preprocessing import StandardScaler , MinMaxScaler
import statsmodels.api as sm
import seaborn as sns
import warnings
from pylab import rcParams
from statsmodels.tsa.seasonal import seasonal_decompose
from pandas.plotting import lag_plot
from pandas.plotting import autocorrelation_plot
from statsmodels.graphics.tsaplots import month_plot , quarter_plot
from scipy.stats import kruskal
In [506]:
Data
Out[506]:
Value
Date
2001-01-01 10.2
2001-02-01 3.1
2001-03-01 6.1
2001-04-01 7.1
2001-05-01 3.4
... ...
2021-08-01 0.1
2021-09-01 0.3
2021-10-01 2.4
2021-11-01 14.2
2021-12-01 24.8

1452 rows × 1 columns

In [456]:
Data=data_daily[data_daily.columns.tolist()].resample("M").mean()
In [507]:
Data
Out[507]:
Value
Date
2001-01-01 10.2
2001-02-01 3.1
2001-03-01 6.1
2001-04-01 7.1
2001-05-01 3.4
... ...
2021-08-01 0.1
2021-09-01 0.3
2021-10-01 2.4
2021-11-01 14.2
2021-12-01 24.8

1452 rows × 1 columns

In [224]:
plot=seasonal_decompose(data , model="additive" , period=30)
plot.plot(); 
In [225]:
Data=data.copy()
In [106]:
plot=seasonal_decompose(Data , model="additive")
plot.plot(); 
In [508]:
from sklearn.preprocessing import MinMaxScaler

def scale_numeric_columns(data):
    data_scaled = data.copy()
    scalers = {}
    
    
    if isinstance(data, pd.DataFrame):
        columns_to_scale = data.columns
    elif isinstance(data, pd.Series):
        columns_to_scale = [data.name]
        data = data.to_frame() 
    
    for c in columns_to_scale:
        if data[c].dtype != 'object': 
            scalers[c] = MinMaxScaler()
            data_scaled[c] = scalers[c].fit_transform(data[[c]])
    
    return data_scaled

Data_scaled = scale_numeric_columns(Data)
Data =pd.DataFrame( Data_scaled["Value"] , columns=["Value"])
In [509]:
Data
Out[509]:
Value
Date
2001-01-01 0.196154
2001-02-01 0.059615
2001-03-01 0.117308
2001-04-01 0.136538
2001-05-01 0.065385
... ...
2021-08-01 0.001923
2021-09-01 0.005769
2021-10-01 0.046154
2021-11-01 0.273077
2021-12-01 0.476923

1452 rows × 1 columns

In [228]:
plot_acf(Data, lags=10); 
In [229]:
plot_pacf(Data , lags=5) ; 
In [231]:
Data
Out[231]:
Value
Date
2001-01-01 0.196154
2001-02-01 0.059615
2001-03-01 0.117308
2001-04-01 0.136538
2001-05-01 0.065385
... ...
2021-08-01 0.001923
2021-09-01 0.005769
2021-10-01 0.046154
2021-11-01 0.273077
2021-12-01 0.476923

1452 rows × 1 columns

In [234]:
axes = Data.plot(marker='.', alpha=0.8, 
                   figsize=(30,25))
In [30]:
lag_plot(Data)
Out[30]:
<Axes: xlabel='y(t)', ylabel='y(t + 1)'>
In [235]:
autocorrelation_plot(Data)
Out[235]:
<Axes: xlabel='Lag', ylabel='Autocorrelation'>
In [510]:
from statsmodels.tsa.stattools import adfuller
def adf_test(series , title=''):
    print(f'Augmented Dickey-fuller Test: {title}')
    Result=adfuller(series.dropna() , autolag="AIC")
    Lable=["ADF Test Statistic " , "P-Value " , "Lags Used " ,"Observations"]
    Out=pd.Series(Result[0:4], index=Lable)
    for Key , Val in Result[4].items():
        Out['Critical Value %s'% Key]=Val
    print(Out.to_string)
    if Result[1]<=0.05:
        print("Strong Evdiance Against The Null Hypohtesis")
        print("Reject The Null Hypohtesis ")
        print("Data Has No Unit Root Its Stationary")
    else :
        print("Week Evdiance Against The Null Hypohtesis")
        print("Fail to Reject The Null Hypohtesis ")
        print("Data Has A Unit Root Its Non-Stationary")
In [511]:
adf_test(Data)
Augmented Dickey-fuller Test: 
<bound method Series.to_string of ADF Test Statistic    -5.659423e+00
P-Value                9.454858e-07
Lags Used              2.300000e+01
Observations           1.428000e+03
Critical Value 1%     -3.434938e+00
Critical Value 5%     -2.863566e+00
Critical Value 10%    -2.567849e+00
dtype: float64>
Strong Evdiance Against The Null Hypohtesis
Reject The Null Hypohtesis 
Data Has No Unit Root Its Stationary
In [512]:
data_daily = Data
In [513]:
data_daily
Out[513]:
Value
Date
2001-01-01 0.196154
2001-02-01 0.059615
2001-03-01 0.117308
2001-04-01 0.136538
2001-05-01 0.065385
... ...
2021-08-01 0.001923
2021-09-01 0.005769
2021-10-01 0.046154
2021-11-01 0.273077
2021-12-01 0.476923

1452 rows × 1 columns

In [515]:
baseline = data_daily.rolling(window=10).mean()
plt.figure(figsize=(15,3))
plt.plot(data_daily, c='blue',label='Data ')
plt.plot(baseline, c='red', label='Rolling mean')
plt.legend(fontsize=12)
plt.ylabel('')
plt.margins(x=0)
plt.grid()
In [516]:
import numpy as np
from sklearn.metrics import mean_squared_error, mean_absolute_error
from sklearn.metrics import r2_score
In [517]:
baseline = data_daily.shift(1)
baseline = baseline.dropna()
data_actual = data_daily.iloc[1:]
size = int(len(data_actual)*0.7)
train = data_daily[:size]
test = data_actual[size:]
baseline_test = baseline.loc[test.index[0]:]

#Plot
plt.plot(test, c='blue',label='Data - Value ')
plt.plot(baseline_test, c='red', label='Persistence Algorithm')
plt.legend(fontsize=12)
plt.ylabel('kW')
plt.margins(x=0)
plt.title('Test part of the data'), plt.grid(), plt.xticks(rotation=45);

# calcolo errore
print('RMSE: %.3f' % np.sqrt(mean_squared_error(test, baseline_test)))
MAE = mean_absolute_error(test, baseline_test)
MAPE = np.mean(np.abs(baseline_test - test)/np.abs(test))
MASE = np.mean(np.abs(test - baseline_test ))/(np.abs(np.diff(train)).sum()/(len(train)-1))
print('MAE: %.3f' % MAE)
print('MAPE: %.3f' %MAPE)
print('MASE: %.3f' %MASE)
print('R^2 score: %.3f' % r2_score(test, baseline_test))
RMSE: 0.199
MAE: 0.121
MAPE: inf
MASE: inf
R^2 score: -0.487
In [518]:
import math
def Matrix(Y_test, Y_pred):
    y_test = Y_test.to_numpy()  # Convert Pandas Series to NumPy array
    y_estimated = Y_pred
    mse = mean_squared_error(y_test, y_estimated)
    m_test = sum(y_test) / len(y_test)
    m_estimated = sum(y_estimated) / len(y_estimated)
    up = sum([(y_test[i] - m_test) * (y_estimated[i] - m_estimated) for i in range(0, len(y_test))])
    sum1 = sum([pow(y_test[i] - m_test, 2) for i in range(0, len(y_test))])
    sum2 = sum([pow(y_estimated[i] - m_estimated, 2) for i in range(0, len(y_test))])
    down = math.sqrt(sum1 * sum2)
    r = up / down
    rmse = math.sqrt(mse)
    mae = mean_absolute_error(y_test, y_estimated)
    mbe = sum([(y_test[i] - y_estimated[i]) for i in range(0, len(y_test))]) / len(y_test)
    R2 = r * r
    RRMSE = (rmse * 100) / (m_test)   # mean(SD_obs)
    # nse calculation
    y_test_avg = sum(y_test) / len(y_test)
    nse_numerator = sum([(y_test[i] - y_estimated[i]) ** 2 for i in range(0, len(y_test))])
    nse_denominator = sum([(y_test[i] - y_test_avg) ** 2 for i in range(0, len(y_test))])
    NSE = 1 - nse_numerator / nse_denominator

    # WI calculation
    wi_numerator = sum([abs(y_test[i] - y_estimated[i]) for i in range(0, len(y_test))])
    wi_denominator = 2 * sum([abs(y_test[i] - y_test_avg) for i in range(0, len(y_test))])
    WI = 1 - wi_numerator / wi_denominator

    return mse, rmse, mae, mbe, r, R2, RRMSE, NSE, WI
In [519]:
baseline_test
Out[519]:
Value
Date
1985-09-01 0.001923
1985-10-01 0.003846
1985-11-01 0.071154
1985-12-01 0.580769
1986-01-01 0.407692
... ...
2021-08-01 0.001923
2021-09-01 0.001923
2021-10-01 0.005769
2021-11-01 0.046154
2021-12-01 0.273077

436 rows × 1 columns

In [520]:
baseline=pd.DataFrame(test)
baseline["baseline"] = baseline_test
In [521]:
test_baseline=test.copy()
In [522]:
test_baseline
Out[522]:
Value baseline
Date
1985-09-01 0.003846 0.001923
1985-10-01 0.071154 0.003846
1985-11-01 0.580769 0.071154
1985-12-01 0.407692 0.580769
1986-01-01 0.592308 0.407692
... ... ...
2021-08-01 0.001923 0.001923
2021-09-01 0.005769 0.001923
2021-10-01 0.046154 0.005769
2021-11-01 0.273077 0.046154
2021-12-01 0.476923 0.273077

436 rows × 2 columns

In [523]:
new_row = pd.DataFrame([Matrix(test.Value ,test["baseline"] )])
Z=["mse", "rmse", "mae", "mbe", "r", "R2", "RRMSE", "NSE", "WI"]
new_row.columns =Z
In [524]:
result = new_row
In [525]:
result
Out[525]:
mse rmse mae mbe r R2 RRMSE NSE WI
0 0.039432 0.198574 0.120505 0.001089 0.252725 0.06387 170.707793 -0.48701 0.513604
In [526]:
rollingMEAN = data_daily.rolling(window=10).mean()
rollingSTD = data_daily.rolling(window=10).std()

fig, (ax1, ax2) = plt.subplots(2,1)
plt.subplots_adjust(hspace=0.4)
ax1.plot(data_daily, c='blue',label='Data ')
ax1.plot(rollingMEAN, c='red', label='Rolling mean')
ax2.plot(rollingSTD, c='black',label = 'Rolling Std')

ax1.legend(fontsize=12), ax2.legend(fontsize=12)
ax1.set_ylabel('kW'), ax2.set_ylabel('kW')
ax1.margins(x=0), ax2.margins(x=0)
ax1.grid(), ax2.grid()
Out[526]:
(None, None)
In [527]:
size = int(len(data_daily)*0.7)
train = data_daily[:size]
test = data_daily[size:]
In [528]:
test
Out[528]:
Value
Date
1985-09-01 0.003846
1985-10-01 0.071154
1985-11-01 0.580769
1985-12-01 0.407692
1986-01-01 0.592308
... ...
2021-08-01 0.001923
2021-09-01 0.005769
2021-10-01 0.046154
2021-11-01 0.273077
2021-12-01 0.476923

436 rows × 1 columns

In [ ]:
 
In [529]:
n = 1
X = data_daily.values
size = int(len(X) * 0.7)
train, test = X[0:size], X[size:len(X)]
predictions = list()
confidence = list()
history = [x for x in train]
for t in range(0,len(test),n):
    model = ARIMA(history, order=(2,0,1))
    model_fit = model.fit()
    output = model_fit.forecast(n).tolist()
    conf = model_fit.get_forecast(n).conf_int(0.05)
    predictions.extend(output)
    confidence.extend(conf)
    obs = test.tolist()[t:t+n]
    history = history[n:]
    history.extend(obs);  
conf_int =  np.vstack(confidence) 
In [530]:
m = len(predictions) - len(test)
index_extended = data_daily[size:].index.union(data_daily[size:].index.shift((m))[-(m):])
predictions_series = pd.Series(predictions, index=index_extended)
confidence = pd.DataFrame(conf_int, columns=['lower', 'upper'])
In [531]:
test = pd.DataFrame(test)
In [532]:
test["ARIMA"] = predictions
In [533]:
test.columns=["test" , "ARIMA"]
In [534]:
test.index = Data[size:len(X)].index
In [535]:
test
Out[535]:
test ARIMA
Date
1985-09-01 0.003846 0.100064
1985-10-01 0.071154 0.116146
1985-11-01 0.580769 0.156372
1985-12-01 0.407692 0.364222
1986-01-01 0.592308 0.205666
... ... ...
2021-08-01 0.001923 0.041139
2021-09-01 0.005769 0.107671
2021-10-01 0.046154 0.042625
2021-11-01 0.273077 0.122090
2021-12-01 0.476923 0.136350

436 rows × 2 columns

In [536]:
Matrix(test["test"] ,test["ARIMA"] )
Out[536]:
(0.02424989839764312,
 0.15572378879812526,
 0.10868618039698635,
 -0.0008996842220513843,
 0.3139685042298476,
 0.09857622164832784,
 133.87061538812753,
 0.08551287140995589,
 0.5613105584591628)
In [537]:
new_row = pd.DataFrame([Matrix(test["test"] ,test["ARIMA"] )])
new_row.columns =Z
new_row
Out[537]:
mse rmse mae mbe r R2 RRMSE NSE WI
0 0.02425 0.155724 0.108686 -0.0009 0.313969 0.098576 133.870615 0.085513 0.561311
In [538]:
result_df = pd.concat([result, new_row], axis=0 ,   keys=['baseline' , 'ARIMA'] )

result_df
Out[538]:
mse rmse mae mbe r R2 RRMSE NSE WI
baseline 0 0.039432 0.198574 0.120505 0.001089 0.252725 0.063870 170.707793 -0.487010 0.513604
ARIMA 0 0.024250 0.155724 0.108686 -0.000900 0.313969 0.098576 133.870615 0.085513 0.561311
In [551]:
ForeCating
Out[551]:
sarima_forecast
0 0.009516
1 0.078747
2 0.379815
3 0.262313
4 0.229006
... ...
431 0.018702
432 0.019970
433 0.085060
434 0.388992
435 0.270370

436 rows × 1 columns

In [540]:
LSTM_
Out[540]:
Test LSTM
0 0.000000 0.070894
1 0.001923 0.038783
2 0.001923 0.004139
3 0.003846 -0.015221
4 0.046154 0.025185
... ... ...
566 0.001923 -0.001025
567 0.005769 0.006319
568 0.046154 0.138052
569 0.273077 0.430540
570 0.476923 0.304305

571 rows × 2 columns

In [541]:
test_=test.reset_index(inplace=True)
In [542]:
ForeCating
Out[542]:
sarima_forecast
0 0.009516
1 0.078747
2 0.379815
3 0.262313
4 0.229006
... ...
431 0.018702
432 0.019970
433 0.085060
434 0.388992
435 0.270370

436 rows × 1 columns

In [543]:
test.drop(["Date"] , axis=1 , inplace=True)
In [552]:
test["SARIMA"]=ForeCating["sarima_forecast"]
In [553]:
pd.DataFrame(test)
Out[553]:
test ARIMA SARIMA
0 0.003846 0.100064 0.009516
1 0.071154 0.116146 0.078747
2 0.580769 0.156372 0.379815
3 0.407692 0.364222 0.262313
4 0.592308 0.205666 0.229006
... ... ... ...
431 0.001923 0.041139 0.018702
432 0.005769 0.107671 0.019970
433 0.046154 0.042625 0.085060
434 0.273077 0.122090 0.388992
435 0.476923 0.136350 0.270370

436 rows × 3 columns

In [546]:
LSTM_
Out[546]:
Test LSTM
0 0.000000 0.070894
1 0.001923 0.038783
2 0.001923 0.004139
3 0.003846 -0.015221
4 0.046154 0.025185
... ... ...
566 0.001923 -0.001025
567 0.005769 0.006319
568 0.046154 0.138052
569 0.273077 0.430540
570 0.476923 0.304305

571 rows × 2 columns

In [547]:
import math
def Matrix(Y_test, Y_pred):
    y_test = Y_test.to_numpy()  # Convert Pandas Series to NumPy array
    y_estimated = Y_pred
    mse = mean_squared_error(y_test, y_estimated)
    m_test = sum(y_test) / len(y_test)
    m_estimated = sum(y_estimated) / len(y_estimated)
    up = sum([(y_test[i] - m_test) * (y_estimated[i] - m_estimated) for i in range(0, len(y_test))])
    sum1 = sum([pow(y_test[i] - m_test, 2) for i in range(0, len(y_test))])
    sum2 = sum([pow(y_estimated[i] - m_estimated, 2) for i in range(0, len(y_test))])
    down = math.sqrt(sum1 * sum2)
    r = up / down
    rmse = math.sqrt(mse)
    mae = mean_absolute_error(y_test, y_estimated)
    mbe = sum([(y_test[i] - y_estimated[i]) for i in range(0, len(y_test))]) / len(y_test)
    R2 = r * r
    RRMSE = (rmse * 100) / (m_test)   # mean(SD_obs)
    # nse calculation
    y_test_avg = sum(y_test) / len(y_test)
    nse_numerator = sum([(y_test[i] - y_estimated[i]) ** 2 for i in range(0, len(y_test))])
    nse_denominator = sum([(y_test[i] - y_test_avg) ** 2 for i in range(0, len(y_test))])
    NSE = 1 - nse_numerator / nse_denominator

    # WI calculation
    wi_numerator = sum([abs(y_test[i] - y_estimated[i]) for i in range(0, len(y_test))])
    wi_denominator = 2 * sum([abs(y_test[i] - y_test_avg) for i in range(0, len(y_test))])
    WI = 1 - wi_numerator / wi_denominator

    return mse, rmse, mae, mbe, r, R2, RRMSE, NSE, WI
In [589]:
LSTM_
Out[589]:
Test LSTM
0 0.000000 0.070894
1 0.001923 0.038783
2 0.001923 0.004139
3 0.003846 -0.015221
4 0.046154 0.025185
... ... ...
566 0.001923 -0.001025
567 0.005769 0.006319
568 0.046154 0.138052
569 0.273077 0.430540
570 0.476923 0.304305

571 rows × 2 columns

In [590]:
LSTM = pd.DataFrame([Matrix(LSTM_["Test"] ,LSTM_["LSTM"] )])
LSTM.columns =Z
LSTM
Out[590]:
mse rmse mae mbe r R2 RRMSE NSE WI
0 0.013976 0.118221 0.074762 -0.018328 0.673703 0.453875 104.077459 0.426391 0.684735
In [562]:
test["baseline"]=test_baseline["baseline"].values
In [563]:
test
Out[563]:
test ARIMA SARIMA baseline
0 0.003846 0.100064 0.009516 0.001923
1 0.071154 0.116146 0.078747 0.003846
2 0.580769 0.156372 0.379815 0.071154
3 0.407692 0.364222 0.262313 0.580769
4 0.592308 0.205666 0.229006 0.407692
... ... ... ... ...
431 0.001923 0.041139 0.018702 0.001923
432 0.005769 0.107671 0.019970 0.001923
433 0.046154 0.042625 0.085060 0.005769
434 0.273077 0.122090 0.388992 0.046154
435 0.476923 0.136350 0.270370 0.273077

436 rows × 4 columns

In [591]:
result_df = pd.concat([result, new_row , SARIMA , LSTM], axis=0 ,   keys=['baseline' , 'ARIMA' , "SARIMA" ,"LSTM"] )

result_df
Out[591]:
mse rmse mae mbe r R2 RRMSE NSE WI
baseline 0 0.039432 0.198574 0.120505 0.001089 0.252725 0.063870 170.707793 -0.487010 0.513604
ARIMA 0 0.024250 0.155724 0.108686 -0.000900 0.313969 0.098576 133.870615 0.085513 0.561311
SARIMA 0 0.012407 0.111388 0.058482 -0.002789 0.729863 0.532699 95.756353 0.532112 0.763949
LSTM 0 0.013976 0.118221 0.074762 -0.018328 0.673703 0.453875 104.077459 0.426391 0.684735
In [592]:
result_df.index= ["baseline" , "ARIMA" , "SARIMA" , "LSTM"]
In [593]:
result_df
Out[593]:
mse rmse mae mbe r R2 RRMSE NSE WI
baseline 0.039432 0.198574 0.120505 0.001089 0.252725 0.063870 170.707793 -0.487010 0.513604
ARIMA 0.024250 0.155724 0.108686 -0.000900 0.313969 0.098576 133.870615 0.085513 0.561311
SARIMA 0.012407 0.111388 0.058482 -0.002789 0.729863 0.532699 95.756353 0.532112 0.763949
LSTM 0.013976 0.118221 0.074762 -0.018328 0.673703 0.453875 104.077459 0.426391 0.684735
In [594]:
result_df.rename_axis(index=['Models'] , inplace=True )
In [595]:
Prediction_ = test.copy()
In [596]:
Prediction_
Out[596]:
test ARIMA SARIMA baseline
0 0.003846 0.100064 0.009516 0.001923
1 0.071154 0.116146 0.078747 0.003846
2 0.580769 0.156372 0.379815 0.071154
3 0.407692 0.364222 0.262313 0.580769
4 0.592308 0.205666 0.229006 0.407692
... ... ... ... ...
431 0.001923 0.041139 0.018702 0.001923
432 0.005769 0.107671 0.019970 0.001923
433 0.046154 0.042625 0.085060 0.005769
434 0.273077 0.122090 0.388992 0.046154
435 0.476923 0.136350 0.270370 0.273077

436 rows × 4 columns

In [597]:
plt.figure()
for model in Prediction_.columns:
    if model != 'test':
        plt.scatter(Prediction_['test'], Prediction_[model], label=model)

plt.xlabel('Original Values')
plt.ylabel('Predicted Values')
plt.title('Scatter Plot of Model Predictions vs. Original Values')
plt.savefig("Scatter Plot of Model Predictions vs. Original Values.png", dpi=400) 
plt.legend()
plt.grid()
plt.show()

 
In [598]:
prediction_errors = Prediction_.drop(columns='test').subtract(Prediction_['test'], axis=0)

plt.figure()
prediction_errors.boxplot()
plt.xticks(rotation=45)
plt.ylabel('Prediction Errors')
plt.title('Box Plot of Prediction Errors for Each Model')
plt.grid()
plt.savefig("Box Plot of Prediction Errors for Each Model.png", dpi=400) 
plt.show()
In [599]:
plt.figure(figsize=(10, 6))
prediction_errors = Prediction_.drop(columns='test').subtract(Prediction_['test'], axis=0)
prediction_errors = prediction_errors.melt(var_name='Model', value_name='Error')
sns.violinplot(data=prediction_errors, x='Model', y='Error')
plt.xticks(rotation=45)
plt.ylabel('Prediction Errors')
plt.title('Violin Plot of Prediction Errors for Each Model')
plt.savefig("Violin Plot of Prediction Errors for Each Model.png", dpi=400) 
plt.grid()
plt.show()
In [600]:
plt.figure(figsize=(25, 18))
for model in Prediction_.columns:
    if model != 'test':
        residuals = Prediction_[model] - Prediction_['test']
        plt.plot(Prediction_.index, residuals, label=model)
plt.axhline(y=0, color='r', linestyle='--')
plt.xlabel('Data Points')
plt.ylabel('Residuals')
plt.title('Residual Plot of Model Predictions')
plt.legend()
plt.grid()
plt.savefig("Residual Plot of Model Predictions.png", dpi=400)
plt.show()
In [ ]:
 
In [602]:
result = result_df
In [603]:
Features =list( result.columns)
Features
Out[603]:
['mse', 'rmse', 'mae', 'mbe', 'r', 'R2', 'RRMSE', 'NSE', 'WI']
In [604]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt 
import seaborn as sns 
import scipy.stats as stats
import math
In [605]:
metrics_to_plot = Features
num_features = len(metrics_to_plot)
cols = math.ceil(math.sqrt(num_features))
rows = math.ceil(num_features / cols)
plt.figure(figsize=(12, 8))
for i, metric in enumerate(metrics_to_plot):
    plt.subplot(rows, cols, i + 1)
    stats.probplot(result[metric], dist="norm", plot=plt)
    plt.title(f'Q-Q Plot for {metric.upper()}', fontweight='bold')  
    plt.grid()
plt.tight_layout(h_pad=0.5, w_pad=0.5) 
plt.savefig("qq_plots.png", dpi=300) 
plt.tight_layout()
plt.show()
In [606]:
plt.figure(figsize=(12, 8))
sns.heatmap(result[metrics_to_plot], annot=True, fmt=".2f", cmap="YlGnBu")
plt.xlabel('Metrics')
plt.ylabel('Models')
plt.title('Performance Metric Comparison (Heatmap)') 
plt.savefig("Performance Metric Comparison (Heatmap).png", dpi=300) 
plt.xticks(rotation=45)
plt.show()
In [607]:
from pandas.plotting import parallel_coordinates
plt.figure(figsize=(12, 8))
parallel_coordinates(result.reset_index(), 'Models', colormap='viridis')
plt.xlabel('Metrics')
plt.ylabel('Metric Values')
plt.title('Parallel Coordinates Plot of Model Comparison')
plt.xticks(rotation=45)
plt.grid(axis='y')
plt.savefig("Parallel Coordinates Plot of Model Comparison.png", dpi=400) 
plt.show()
In [608]:
from math import pi
normalized_metrics = (result[metrics_to_plot] - result[metrics_to_plot].min()) / (result[metrics_to_plot].max() - result[metrics_to_plot].min())
plt.figure(figsize=(10, 10))
for model in result.index:
    values = normalized_metrics.loc[model].tolist()
    values += values[:1]  
    angles = [n / float(len(metrics_to_plot)) * 2 * pi for n in range(len(metrics_to_plot))]
    angles += angles[:1]
    plt.polar(angles, values, label=model)
plt.title('Radar Plot of Performance Metrics')
plt.legend(loc='upper right', bbox_to_anchor=(0.1, 0.1))
plt.savefig("Radar Plot of Performance Metrics.png", dpi=400) 
plt.show()
In [609]:
predictions_df = Prediction_
In [614]:
Prediction_.columns
Out[614]:
Index(['test', 'ARIMA', 'SARIMA', 'baseline'], dtype='object')
In [610]:
plt.plot(predictions_df.index, predictions_df['test'], label='Actual', marker='o')
plt.plot(predictions_df.index, predictions_df['baseline'], label='Baseline', marker='o')
plt.plot(predictions_df.index, predictions_df['ARIMA'], label='ARIMA', marker='o')
plt.title('Predictions vs Actual Values')
plt.xlabel('DateTime')
plt.ylabel('Values')
plt.legend()
plt.show()
In [615]:
for model in Prediction_.columns:
    plt.plot(predictions_df.index, predictions_df[model], label=model, marker='o')
plt.plot(predictions_df.index, predictions_df['test'], label='Actual', linestyle='--', color='black', marker='o')
plt.title('Predictions vs Actual Values')
plt.xlabel('DateTime')
plt.ylabel('Values')
plt.legend()
plt.show()
In [616]:
for model in Prediction_.columns:
    residuals = predictions_df['test'] - predictions_df[model]
    plt.figure(figsize=(12, 6))
    plt.plot(predictions_df.index, residuals, label=f'{model} Residuals', marker='o')
    plt.axhline(y=0, color='red', linestyle='--', label='Zero Residuals')
    plt.title(f'{model} Residuals Analysis')
    plt.xlabel('DateTime')
    plt.ylabel('Residuals')
    plt.legend()
    plt.show()
In [617]:
plt.figure(figsize=(12, 6))
for model in :
    residuals = predictions_df['test'] - predictions_df[model]
    sns.histplot(residuals, kde=True, label=f'{model} Residuals', bins=30)
plt.title('Distribution of Residuals for Each Model')
plt.xlabel('Residuals')
plt.ylabel('Frequency')
plt.legend()
plt.show()
In [618]:
plt.figure(figsize=(12, 6))
for model in Prediction_.columns:
    residuals = predictions_df['test'] - predictions_df[model]
    sns.ecdfplot(residuals, label=f'{model} Residuals')
plt.title('Cumulative Distribution Function of Residuals for Each Model')
plt.xlabel('Residuals')
plt.ylabel('Cumulative Probability')
plt.legend()
plt.show()
In [619]:
Col=Prediction_.columns
In [622]:
Col
Out[622]:
Index(['test', 'ARIMA', 'SARIMA', 'baseline'], dtype='object')
In [620]:
from statsmodels.graphics.tsaplots import plot_acf

plt.figure(figsize=(12, 6))
for model in Col:
    residuals = predictions_df['test'] - predictions_df[model]
    plot_acf(residuals, lags=5, label=f'{model} Residuals', alpha=0.05)

plt.title('Autocorrelation of Residuals for Each Model')
plt.xlabel('Lags')
plt.ylabel('Autocorrelation')
plt.legend()
plt.show()
<Figure size 1200x600 with 0 Axes>
In [621]:
plt.figure(figsize=(12, 8))
for model in Col:
    residuals = predictions_df['test'] - predictions_df[model]
    sns.scatterplot(x=predictions_df[model], y=residuals, label=f'{model} Residuals', alpha=0.7)

plt.axhline(y=0, color='red', linestyle='--', label='Zero Residuals')
plt.title('Scatter Plot of Predictions vs Residuals')
plt.xlabel('Predictions')
plt.ylabel('Residuals')
plt.legend()
plt.show()
In [634]:
LSTM_.to_csv("Data_ml/LSTMPrediction.csv", index=False)
In [630]:
result_df.to_csv("Data_ml/Results.csv" , index=False)
In [632]:
Prediction_.to_csv("Data_ml/Forecasting.csv" , index=False)
In [ ]: